*This is the replication .do file for the Online Appendix findings; it uses two .dta files: PSrevpublishedfieldexp.dta and unifiedregistryFINAL_PS.dta. See further explanations below*


*Processing the publication dataset for RCTs in political science (based on Wilson and Knutsen (2020) download in xls format)*

*parsing categories*
 gen authorindivid=author
  . split authorindivid, p(" AND ")
  
 gen affiliationindivid=affiliation
  . split affiliationindivid, p(", ")


 *encoding categories*
 
  encode journal, generate(journalnum)
  encode topic, generate(topicnum)
  encode affiliationindivid1, generate (afin1)
  encode affiliationindivid2, generate (afin2)
  encode affiliationindivid3, generate (afin3)
  encode affiliationindivid4, generate (afin4)
  encode affiliationindivid5, generate (afin5)
  encode affiliationindivid6, generate (afin6)

  *country codes*
   kountry country, from(other) stuck

  *graph for topics - Figure A6 Online Appendix*
  hist topicnum, discrete xla(1/7, valuelabel angle(45) noticks) barw(0.4)
  hist topicnum if year>2013 & year<2020, discrete xla(1/7, valuelabel angle(45) noticks) barw(0.4) percent
  
  *for tabbing individual authors and affiliations, the variables afffiliationindivid1-afffiliationindivid6 were stacked up to prevent non-synched value labels*
  
    encode affiliationindivid1, generate (univ)
    encode authorindivid1, generate (researchers)

sort researchers
	
by researchers: gen researcher_freq = _N
gen pioneer=1 if researcher_freq>1
replace pioneer=0 if pioneer==.

*counting team composition and number for co-authors*

 gen authorstudy=author
  . split authorstudy, p(" AND ")


egen countauthor = rownonmiss(authorstudy1-authorstudy5), strok

*generating country of origin for university affiliation*

gen corigin1="Australia" if afin1==1
replace corigin1="Denmark" if afin1==2
replace corigin1="China" if afin1==3
replace corigin1="France" if afin1==9
replace corigin1="Switzerland" if afin1==14
replace corigin1="United Kingdom" if afin1==20
replace corigin1="Germany" if afin1==21
replace corigin1="Canada" if afin1==31
replace corigin1="Germany" if afin1==43

gen corigin2="United Kingdom" if afin2==8
replace corigin2="Germany" if afin2==9
replace corigin2="Switzerland" if afin2==13
replace corigin2="United Kingdom" if afin2==15
replace corigin2="France" if afin2==17
replace corigin2="Spain" if afin2==21
replace corigin2="Israel" if afin2==29
replace corigin2="Brazil" if afin2==26

gen corigin3="Australia" if afin3==5
replace corigin3="China" if afin3==12
replace corigin3="Russia" if afin3==13
replace corigin3="Brazil" if afin3==14
replace corigin3="Brazil" if afin3==15
replace corigin3="Estonia" if afin3==17
replace corigin3="Japan" if afin3==18
replace corigin3="Israel" if afin3==19

gen corigin4="Portugal" if afin4==2
replace corigin4="United Kingdom" if afin4==1
replace corigin4="Germany" if afin4==4
replace corigin4="China" if afin4==6

gen corigin5="Denmark" if afin5==1


replace corigin1="United States" if corigin1==""."" & rct==1
replace corigin2="United States" if corigin2==""."" & rct==1 & afin2!=.
replace corigin3="United States" if corigin3==""."" & rct==1 & afin3!=.
replace corigin4="United States" if corigin4==""."" & rct==1 & afin4!=.
replace corigin5="United States" if corigin5==""."" & rct==1 & afin5!=.
gen corigin6="United States" if afin6!=.

gen corigin=corigin1

rename _ISO3N_ ccode
kountry corigin, from(other) stuck
rename _ISO3N_ ocode

sort ccode
by ccode: gen countryfreq=_N if year>2013
gen highfreqcountry=1 if countryfreq>=2
replace highfreqcountry=0 if highfreqcountry==.
replace highfreqcountry=. if rct==.

save "PSrevpublishedfieldexp.dta"


*checking the country tally*

  encode corigin1, generate(origincorrect1)
  encode corigin2, generate(origincorrect2)
  encode corigin3, generate(origincorrect3)
  encode corigin4, generate(origincorrect4)
  encode corigin5, generate(origincorrect5)
  encode corigin6, generate(origincorrect6)

  
 *The replication codes for Online Appendix findings start here based on the pre-processed dataset*
 
  use "PSrevpublishedfieldexp.dta"
  
  *bar graph for citations years - Figure A3 Online Appendix*
  twoway (bar citations year)
  
  *author count graph - Figure A4 Online Appendix*
  twoway (bar countauthor year)
  
 *top 10 countries of published RCTs (share is calculated based on the pool of countries that exceed 2 RCTs published in top Poli Sci journals - Figure A5 Online Appendix*
  
  graph hbar if year>2013 & countryfreq>=2 , over(ccode)
    graph hbar if  countryfreq>=2 , over(ccode)
    graph hbar  , over(ccode)
	graph hbar if countryfreq >=2 & year>2013, over(ccode)
	
	*note: since 2013, Liberia accounts for even more 6.39% - in general it accounts for 3.49% (2000-2019)*
	
	graph hbar if year>2013 & [_N] >=2, over(ccode)
	
	*share of experiments published in top Poli Sci journals  (by journal) - Figure A2 Online Appendix*
	
  hist journalnum, discrete xla(1/7, valuelabel labsize(vsmall) angle(60) noticks) barw(0.4) percent
  hist journalnum if year>2013, discrete xla(1/7, valuelabel labsize(vsmall) angle(60) noticks) barw(0.4) percent
  hist journalnum if year>2000, discrete xla(1/7, valuelabel labsize(vsmall) angle(60) noticks) barw(0.4) percent
  
  *Figure A7 Online Appendix - Network analysis for all published experiments in PS*
  
sort year
nwfromedge corigin1 country  if year>2013 & year<2020, name(netpublished1) 
nwplot netpublished1, lab  layout(circle)


 *Figure A8 Online Appendix - Network analysis for all pre-registered experiments in PS*

clear
use "unifiedregistryFINAL_PS.dta"

sort year
drop _merge
drop if codesupplycountry ==.
drop if codedemandcountry==.


replace supplycountry ="United Kingdom" if codesupplycountry==826
replace supplycountry ="United States" if codesupplycountry==840
replace demandcountry ="United Kingdom" if codedemandcountry==826
replace demandcountry ="United States" if codedemandcountry==840

replace supplycountry ="DRC" if codesupplycountry==180
replace supplycountry ="Ivory Coast" if codesupplycountry==384
replace demandcountry ="DRC" if codedemandcountry==180
replace demandcountry ="Ivory Coast" if codedemandcountry==384

replace supplycountry ="Afghanistan" if codesupplycountry==4
replace supplycountry ="Netherlands" if codesupplycountry==528
replace demandcountry ="Afghanistan" if codedemandcountry==4
replace demandcountry ="Netherlands" if codedemandcountry==528

replace supplycountry ="Gambia" if codesupplycountry==270
replace demandcountry ="Gambia" if codedemandcountry==270

replace supplycountry ="UAE" if codesupplycountry==784
replace supplycountry ="Iceland" if codesupplycountry==352
replace demandcountry ="UAE" if codedemandcountry==784
replace demandcountry ="Iceland" if codedemandcountry==352



nwfromedge supplycountry demandcountry  if year>2013 & year<2020, name(netpublishedcomplete3) 
nwplot netpublishedcomplete3, lab  layout(circle)
